In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
df = pd.read_csv('Utah_County_Sheriff_s_Office.csv',
parse_dates=["incident_datetime"],
infer_datetime_format=True, usecols=["incident_type_primary","city","latitude","longitude","incident_datetime","parent_incident_type"])
eagle = df[df['city']=="EAGLE MOUNTAIN"]
eagle.dtypes
Out[2]:
In [3]:
eagle.sort_values("incident_datetime").head()
Out[3]:
In [4]:
eagle.parent_incident_type.value_counts()
Out[4]:
In [5]:
theft_times = eagle[eagle.parent_incident_type=='Property Crime'].incident_datetime.values
thefts = pd.Series([1]*len(theft_times), index=theft_times)
thefts_by_mo = thefts.resample('3M').sum()
thefts_by_mo.plot()
Out[5]:
In [6]:
theft_times = df[df.parent_incident_type=='Property Crime'].incident_datetime.values
thefts = pd.Series([1]*len(theft_times), index=theft_times)
thefts_by_mo = thefts.resample('3M').sum()
thefts_by_mo.plot()
Out[6]:
In [7]:
eagle.incident_type_primary.value_counts()
Out[7]:
In [8]:
eagle[eagle.incident_type_primary == "THEFT-PROPERTY-FROM MOTOR VEHI"].to_csv("thefts_from_vehicles.csv", index=False, columns=["incident_datetime","latitude","longitude"])
In [9]:
theft_times = df[df.incident_type_primary=="THEFT-PROPERTY-FROM MOTOR VEHI"].incident_datetime.values
thefts = pd.Series([1]*len(theft_times), index=theft_times)
thefts_by_mo = thefts.resample('1M').sum()
thefts_by_mo.plot()
Out[9]:
In [19]:
from scipy import stats
In [64]:
em_thefts = df[df.incident_type_primary=="THEFT-PROPERTY-FROM MOTOR VEHI"][['latitude', 'longitude']]
et = em_thefts[(np.abs(stats.zscore(em_thefts)) < .12501).all(axis=1)]
x = et.latitude.values
y = et.longitude.values
heatmap, xedges, yedges = np.histogram2d(y, x, bins=100)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.imshow(heatmap.T, extent=extent, origin='lower')
plt.show()
In [77]:
i, j = np.unravel_index(heatmap.argmax(), heatmap.shape)
yedges[j:j+1]
#"https://www.google.com/maps?daddr=%s,%s" % (np.mean([yedges[j:j+1]]), np.mean([xedges[i:i+1]])
Out[77]:
In [ ]: